# print link to github repo if anyif (file.exists("./.git/config")){ config <-readLines("./.git/config") url <-grep("url", config, value =TRUE) url <-gsub("\\turl = |.git$", "", url)cat("\nSource code and data found at [", url, "](", url, ")", sep ="") }
# ggplot font sizebs <-12# custom functionspart_var <-function(model) {var_components_pc1 <-as.data.frame(VarCorr(model))total_variance <-sum(var_components_pc1$vcov)var_components_pc1$proportion <- var_components_pc1$vcov / total_variancereturn(var_components_pc1) }# plot variance partitioning resultsgraph_part_var <-function(levels, data,by =NULL, response,full =TRUE) {if (!is.null(by)) {if (full) { by_levels <-c(unique(data[, by]), "full") } else { by_levels <-unique(data[, by]) } } else by_levels <-"full" var_by_list <-lapply(by_levels, function(x) {if (x !="full") { formula <-paste0(response, " ~ (1 | ", paste(levels, collapse =" / "), ")")cat(paste0(x, ":", formula), "\n") dat <-droplevels(data[data[, by] == x, ]) } else { dat <-droplevels(data)if (!is.null(by)){ formula <-paste0(response, " ~ (1 | ", paste(levels, collapse =" / "), ") + (1 | ", by, ")") } else { formula <-paste0(response, " ~ (1 | ", paste(levels, collapse =" / "), ")") }cat("full:", formula, "\n") } model <-lmer(formula, data = dat) var_components <-part_var(model) var_components <- var_components[var_components$grp !="Residual", ]if (!is.null(by)) { var_components <- var_components[var_components$grp != by, ] } var_components$level <-factor(rev(levels), levels =rev(levels))# sample sizes var_components$n <-sapply(levels, function(y)length(unique(dat[, names(dat) == y]))) var_components$prop <-as.character(round(var_components$proportion, 2)) var_components$prop[var_components$prop =="0"] <-"< 0.01" var_components$prop <-paste(var_components$prop," (n = ", var_components$n,")",sep ="") var_components$by_levels <- x var_components$perc <-paste(round(var_components$proportion *100, 0),"% (n = ", var_components$n,")",sep ="" )return(var_components) }) var_by_df <-do.call(rbind, var_by_list) var_by_df$by_levels <-factor(var_by_df$by_levels, levels = by_levels) gg_out <-ggplot(var_by_df, aes(x = level, y = proportion, fill = level)) +geom_bar(stat ="identity") +theme_classic(base_size =20) +labs(x ="Level", y ="Explained variance (%)", fill ="Factor") +facet_wrap( ~ by_levels, ncol =1) +theme(axis.text.x =element_text(angle =45,vjust =1,hjust =1 )) +geom_text(aes(label = perc), vjust =-0.5, size =5) +scale_fill_viridis_d(option ="G",begin =0.1,end =0.9,guide ="none", direction =-1 ) +scale_y_continuous(labels =label_percent(), limits =c(0, max(var_by_df$proportion) +0.16))# Data for the nested ovalsoval_data <-data.frame(# label = c("A", "B", "C", "D"),x0 =c(0, 0, 0, 0, 0), # Center x position for all ovalsy0 =c(-1.7, -0.4, 0.7, 2.2, 3.3), # Center y position for all ovalsb =c(1.5, 3, 4.5, 6, 7.5), # Semi-major axis (horizontal radius)a =c(2, 4, 6, 8, 10) +1, # Semi-minor axis (vertical radius)text.pos =c(-4.7, -2, 1, 3.2, 6) +3# Radius for each circle)oval_data <- oval_data[seq_along(levels), ]oval_data$label <-rev(levels)oval_data$size <- oval_data$a * oval_data$b# oval_data <- oval_data[oval_data$label %in% c("Site", "Bird"), ] # Order by y0 for correct layering# oval_data$label <- factor(oval_data$label, levels = rev(oval_data$label))oval_data$label <-factor(oval_data$label, levels = oval_data$label[order(oval_data$size, decreasing =FALSE)])oval_data$label.color <- viridis::mako(nrow(oval_data), begin =0.1, end =0.9, direction =-1, alpha =1)oval_data$label.color <-factor(oval_data$label.color, levels = oval_data$label.color[order(oval_data$size, decreasing =TRUE)])# oval_data <- oval_data[1:4,]# Plot using ggplot2 and ggforcegg_oval <-ggplot(oval_data, group = label) +geom_ellipse(aes(x0 = x0, y0 = y0, a = a, b = b, angle =0, fill = label.color), color ="white") +geom_text(aes(x = x0, y = text.pos, label = label), size =7, color =c(rep("black", nrow(oval_data) -1), "white")) +# scale_fill_viridis_d(option = "G", begin = 0.1, end = 0.9, guide = "none", direction = 1) + # Add labels in the centerscale_fill_identity() +# Use the colors as specified in the datatheme_void() +# Remove background, axes, and gridtheme(legend.position ="none") +coord_fixed() #+ # Keep aspect ratio fixed for true ovals#ylim(c(-12, 6))print(gg_oval)return(gg_out) }
Purpose
Quantify contribution of different social levels of organization in the geographic variation of parrots calls.
Analysis flowchart
flowchart LR
A(Read data) --> B(Plot acoustic space)
B --> C(Regression models<br>to partition variance)
C --> D(Plot variance partition)
style A fill:#44015466
style B fill:#3E4A894D
style C fill:#26828E4D
style D fill:#6DCD594D
Load packages
Code
# knitr is require for creating html/pdf/word reports formatR is# used for soft-wrapping code# install/ load packagessketchy::load_packages(packages =c("knitr", "formatR", "warbleR","ggplot2", "maRce10/PhenotypeSpace", "lme4", "scales", "Rtsne","umap", "ggforce", "DT"))
# pcapca <-prcomp(tbp_mfcc[, -c(1:2)], center =TRUE, scale. =TRUE)tbp_calls$PC1.mfcc <- pca$x[, 1]tbp_calls$PC2.mfcc <- pca$x[, 2]ss_type <-space_similarity(type ~ PC1.mfcc + PC2.mfcc, data =as.data.frame(tbp_calls),method ="density.overlap")# graph bidimensional space with gpplot coloring by typeggplot(tbp_calls, aes(x = PC1.mfcc, y = PC2.mfcc, color = type)) +geom_point() +scale_colour_viridis_d(option ="G", begin =0.2,end =0.8, name ="Call type") +theme_classic(base_size = bs) +labs(title =paste0("Mean density overlap:", round(ss_type$mean.overlap,2)), x ="PC1", y ="PC2")
1.2 MFCCs acoustic space by site for each call type
PCA is computed separately for each call type
Code
tbp_calls$PC1.mfcc <-NAtbp_calls$PC2.mfcc <-NAfor (i inunique(tbp_calls$type)) { pca <-prcomp(tbp_mfcc[tbp_calls$type == i, -c(1:2)], center =TRUE,scale. =TRUE) tbp_calls$PC1.mfcc[tbp_calls$type == i] <- pca$x[, 1] tbp_calls$PC2.mfcc[tbp_calls$type == i] <- pca$x[, 2]# umap_l <- umap(tbp_mfcc[tbp_calls$type == i, -c(1:2)])# tbp_calls$PC1.mfcc[tbp_calls$type == i] <- umap_l$layout[,# 1] tbp_calls$PC2.mfcc[tbp_calls$type == i] <-# umap_l$layout[, 2]}ss_type_barks <-space_similarity(Site ~ PC1.mfcc + PC2.mfcc, data =as.data.frame(tbp_calls[tbp_calls$type =="barks", ]), method ="density.overlap")ss_type_laughs <-space_similarity(Site ~ PC1.mfcc + PC2.mfcc, data =as.data.frame(tbp_calls[tbp_calls$type =="laughs", ]), method ="density.overlap")mean_ovlp <-mean(c(ss_type_barks$mean.overlap, ss_type_laughs$mean.overlap))ggplot(tbp_calls, aes(x = PC1.mfcc, y = PC2.mfcc, color = Site)) +geom_point() +scale_colour_viridis_d(option ="G", begin =0.2,end =0.8, name ="Site") +theme_classic(base_size = bs) +facet_grid(~type,scales ="free") +labs(title =paste0("Mean density overlap:",round(mean_ovlp, 2)), x ="PC1", y ="PC2")
1.3 Partition variance contribution
Code
mod_barks <-lmer(PC1.mfcc ~ (1| Site/UniqueBird), data = tbp_calls[tbp_calls$type =="barks", ])graph_part_var(levels =c("Site", "UniqueBird"), data =as.data.frame(tbp_calls),by ="type", response ="PC1.mfcc", full =FALSE)
laughs:PC1.mfcc ~ (1 | Site / UniqueBird)
barks:PC1.mfcc ~ (1 | Site / UniqueBird)
Warning: Using the `size` aesthetic in this geom was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` in the `default_aes` field and elsewhere instead.
Warning: data contain duplicated points
Warning: data contain duplicated points
Code
# graph bidimensional space with gpplot coloring by typeggplot(yna_calls, aes(x = PC1.mfcc, y = PC2.mfcc, color = type)) +geom_point() +scale_colour_viridis_d(option ="G", begin =0.2,end =0.8, name ="Call type") +theme_classic(base_size = bs) +labs(title =paste0("Mean density overlap:", round(ss_type$mean.overlap,2)), x ="PC1", y ="PC2")
2.2 MFCCs acoustic space by site for each call type
Warning: data contain duplicated points
Warning: data contain duplicated points
Code
mean_ovlp <-mean(c(ss_type_1994$mean.overlap, ss_type_2005$mean.overlap))ggplot(yna_calls, aes(x = PC1.mfcc, y = PC2.mfcc, color = Site)) +geom_point() +scale_colour_viridis_d(option ="G", begin =0.2,end =0.8, name ="Site") +theme_classic(base_size = bs) +facet_grid(~type,scales ="free") +labs(title =paste0("Mean density overlap:",round(mean_ovlp, 2)), x ="PC1", y ="PC2")
2.3 Partition variance contribution
Code
graph_part_var(levels =c("Dialect", "Site", "UniqueBird"), data =as.data.frame(yna_calls),by ="type", response ="PC1.mfcc", full =TRUE)
1994:PC1.mfcc ~ (1 | Dialect / Site / UniqueBird)
2005:PC1.mfcc ~ (1 | Dialect / Site / UniqueBird)
Warning: data contain duplicated points
Warning: data contain duplicated points
Warning: data contain duplicated points
Code
# graph bidimensional space with gpplot coloring by typeggplot(mpa_calls, aes(x = PC1.mfcc, y = PC2.mfcc, color = country)) +geom_point() +scale_colour_viridis_d(option ="G", begin =0.2,end =0.8, name ="Country") +theme_classic(base_size = bs) +labs(title =paste0("Mean density overlap:", round(ss_type$mean.overlap,2)), x ="PC1", y ="PC2")
3.2 MFCCs acoustic space by site for each call type
full: PC1.mfcc ~ (1 | region / dept_state / site / Bird_ID)
Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
unable to evaluate scaled gradient
Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
Model failed to converge: degenerate Hessian with 1 negative eigenvalues
Code
graph_part_var(levels =c("region", "site", "Bird_ID"), data = mpa_calls[!is.na(mpa_calls$Bird_ID), ], response ="PC1.mfcc", full =TRUE)
full: PC1.mfcc ~ (1 | region / site / Bird_ID)
Code
graph_part_var(levels =c("region", "site"), data = mpa_calls, response ="PC1.mfcc",full =TRUE, by ="range")
Native:PC1.mfcc ~ (1 | region / site)
Introduced:PC1.mfcc ~ (1 | region / site)
full: PC1.mfcc ~ (1 | region / site) + (1 | range)
Code
graph_part_var(levels =c("region", "dept_state", "site"), data =as.data.frame(mpa_calls),response ="PC1.mfcc", full =TRUE, by ="range")
Native:PC1.mfcc ~ (1 | region / dept_state / site)